The original dataset is in gexf format, we need to read it and convert it to igraph.
# read network data
day1 <- read.gexf('sp_data_school_day_1_g.gexf')
day1 <- gexf.to.igraph(day1)
day2 <- read.gexf('sp_data_school_day_2_g.gexf')
day2 <- gexf.to.igraph(day2)
# merge two networks
contact <- igraph::union(day1, day2, byname = 'auto')
# read and clean attribute data
attribute <- read.table('metadata.txt', header = FALSE)
attribute$type <- ifelse(attribute$V2 == 'Teachers', 'teacher', 'student')
attribute$grade <- ifelse(attribute$type =='teacher', 'N/A', substr(attribute$V2, 1, 1))
attribute$gender <- ifelse(attribute$type =='teacher', 'N/A', as.character(attribute$V3))
attribute <- subset(attribute, select = -c(V3))
colnames(attribute) <- c('name', 'class', 'type', 'grade', 'gender')
num <- cbind(V(contact), get.vertex.attribute(contact, 'name', index = V(contact)))
num <- as.data.frame(num)
colnames(num) <- c('id', 'name')
num$name <- as.integer(as.character(num$name))
attribute$id <- left_join(attribute, num, by = 'name')$id
attribute$id <- as.integer(as.character(attribute$id))
attribute <- arrange(attribute, id)
attriname <- colnames(attribute)[2:5]
head(attribute)
## name class type grade gender id
## 1 1789 1A student 1 M 1
## 2 1780 3A student 3 M 2
## 3 1782 3A student 3 M 3
## 4 1783 1A student 1 M 4
## 5 1787 1A student 1 F 5
## 6 1546 4A student 4 F 6
# data exploration
sqldf('SELECT type, COUNT(*) FROM attribute GROUP BY type')
## type COUNT(*)
## 1 student 232
## 2 teacher 10
sqldf('SELECT grade, COUNT(*) FROM attribute WHERE grade != "N/A" GROUP BY grade')
## grade COUNT(*)
## 1 1 48
## 2 2 49
## 3 3 45
## 4 4 44
## 5 5 46
sqldf('SELECT gender, COUNT(*) FROM attribute WHERE gender != "N/A" GROUP BY gender')
## gender COUNT(*)
## 1 F 112
## 2 M 115
## 3 Unknown 5
# set vertex and edge attributes
for (i in V(contact)){
vname <- get.vertex.attribute(contact, 'name', index = i)
for (j in attriname){
vattri <- subset(attribute, name == vname)[,j]
contact <- set.vertex.attribute(contact, j, index=i, vattri)
}
}
for (i in E(contact)){
weight1 <- get.edge.attribute(contact, 'weight_1', index = i)
weight2 <- get.edge.attribute(contact, 'weight_2', index = i)
weight1 <- ifelse(is.na(weight1), 0, 1)
weight2 <- ifelse(is.na(weight2), 0, 1)
contact <- set.edge.attribute(contact, 'weight', index = i, weight1 + weight2)
start = get.vertex.attribute(contact, 'type', index = ends(contact, i)[1])
end = get.vertex.attribute(contact, 'type', index = ends(contact, i)[2])
contact <- set.edge.attribute(contact, 'same.type', index = i, as.integer(start == end))
}
contact <- delete_edge_attr(contact, 'weight_1')
contact <- delete_edge_attr(contact, 'weight_2')
summary(contact)
## IGRAPH b0cfa90 UNWB 242 8316 --
## + attr: name (v/c), class (v/n), type (v/c), grade (v/c), gender
## | (v/c), weight (e/n), same.type (e/n)
# create a network based on strong relation (weight = 2)
contact_strong <- delete.edges(contact,
E(contact)[get.edge.attribute(contact,
name = "weight") < 2])
summary(contact_strong)
## IGRAPH de7f734 UNWB 242 3122 --
## + attr: name (v/c), class (v/n), type (v/c), grade (v/c), gender
## | (v/c), weight (e/n), same.type (e/n)
# basic plot
# plot contact network
type_color = get.vertex.attribute(contact, 'type')
colors = c("#FF0000FF", "#FFFF00FF")
type_color[type_color == 'teacher'] = colors[1]
type_color[type_color == 'student'] = colors[2]
layout <- layout_nicely(contact, dim = 2)
plot(contact,
layout=layout,
vertex.color=type_color,
vertex.label=NA,
edge.arrow.size=.2,
vertex.size=5)
legend(x=-1.5, y=-1.1, c("Teacher","Student"), pch=21,
col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)
# plot strong contact network
plot(contact_strong,
layout=layout,
vertex.color=type_color,
vertex.label=NA,
edge.arrow.size=.2,
vertex.size=5)
legend(x=-1.5, y=-1.1, c("Teacher","Student"), pch=21,
col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)
# degree
deg <- degree(contact)
deg_strong <- degree(contact_strong)
mean(deg)
## [1] 68.72727
mean(deg_strong)
## [1] 25.80165
deg <- data.frame(type = get.vertex.attribute(contact, 'type'),
gender = get.vertex.attribute(contact, 'gender'),
grade = get.vertex.attribute(contact, 'grade'),
deg = deg)
deg_strong <- data.frame(type = get.vertex.attribute(contact_strong, 'type'),
gender = get.vertex.attribute(contact_strong, 'gender'),
grade = get.vertex.attribute(contact_strong, 'grade'),
deg = deg_strong)
# degree distribution
deg.dist = degree_distribution(contact, cumulative=F, mode="all")
plot( x=0:(length(deg.dist)-1), y=deg.dist, pch=19, cex=1, col="orange",
xlab="Degree", ylab="Frequency")
deg.strong.dist = degree_distribution(contact_strong, cumulative=F, mode="all")
plot( x=0:(length(deg.strong.dist)-1), y=deg.strong.dist, pch=19, cex=1, col="orange",
xlab="Degree", ylab="Frequency")
# degree by type
ggplot(data = deg) + geom_histogram(aes(x = deg, fill = type),bins = 15) +
facet_grid(type ~ .)
ggplot(data = deg_strong) + geom_histogram(aes(x = deg, fill = type),bins = 15) +
facet_grid(type ~ .)
# degree by gender
ggplot(data = subset(deg, gender != 'N/A' & gender != 'Unknown')) +
geom_histogram(aes(x = deg, fill = gender),bins = 15) + facet_grid(gender ~ .)
ggplot(data = subset(deg_strong, gender != 'N/A' & gender != 'Unknown')) +
geom_histogram(aes(x = deg, fill = gender),bins = 15) + facet_grid(gender ~ .)
# degree by grade
ggplot(data = subset(deg, grade != 'N/A')) +
geom_histogram(aes(x = deg, fill = grade),bins = 15) + facet_grid(grade ~ .)
ggplot(data = subset(deg_strong, grade != 'N/A')) +
geom_histogram(aes(x = deg, fill = grade),bins = 15) + facet_grid(grade ~ .)
reachability <- function(g) {
reach_mat = matrix(nrow = vcount(g),
ncol = vcount(g))
for (i in 1:vcount(g)) {
reach_mat[i,] = 0
this_node_reach <- subcomponent(g, i)
for (j in 1:(length(this_node_reach))) {
alter = this_node_reach[j]
reach_mat[i, alter] = 1
}
}
return(reach_mat)
}
reach <- reachability(contact)
reach_strong <- reachability(contact_strong)
mean(reach)
## [1] 1
mean(reach_strong)
## [1] 0.9192337
sp <- shortest.paths(contact)
sp_strong <- shortest.paths(contact_strong)
mean(sp[which(sp != Inf)])
## [1] 1.850488
mean(sp_strong[which(sp_strong != Inf)])
## [1] 4.678307
# density
graph.density(contact)
## [1] 0.2851754
graph.density(contact_strong)
## [1] 0.1070608
# transitivity
transitivity(contact)
## [1] 0.479764
transitivity(contact_strong)
## [1] 0.5492348
# diameter
diameter(contact)
## [1] 3
diameter(contact_strong)
## [1] 10
# closeness centrality
clo <- closeness(contact)
V(contact)$size <- (clo*600)^5
grade_color <- get.vertex.attribute(contact, 'grade')
colors <- rainbow(6)
grade_color[grade_color == 'N/A'] = colors[1]
grade_color[grade_color == '1'] = colors[2]
grade_color[grade_color == '2'] = colors[3]
grade_color[grade_color == '3'] = colors[4]
grade_color[grade_color == '4'] = colors[5]
grade_color[grade_color == '5'] = colors[6]
plot(contact,
layout=layout,
vertex.color=grade_color,
vertex.label=NA,
edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"),
pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)
clo_strong <- closeness(contact_strong)
V(contact_strong)$size <- (clo_strong*5000)^5
plot(contact_strong,
layout=layout,
vertex.color=grade_color,
vertex.label=NA,
edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"),
pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)
# betweeness centrality
bet <- betweenness(contact)
V(contact)$size <- 0.8*bet^(0.4)
plot(contact,
layout=layout,
vertex.color=grade_color,
vertex.label=NA,
edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"),
pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)
bet_strong <- betweenness(contact_strong)
V(contact_strong)$size <- 0.8*bet_strong^(0.4)
plot(contact_strong,
layout=layout,
vertex.color=grade_color,
vertex.label=NA,
edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"),
pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)
# Eigenvector centrality
eig <- eigen_centrality(contact)
V(contact)$size <- (10*eig$vector)
plot(contact,
layout=layout,
vertex.color=grade_color,
vertex.label=NA,
edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"),
pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)
eig_strong <- eigen_centrality(contact_strong)
V(contact_strong)$size <- (15*eig_strong$vector)
plot(contact_strong,
layout=layout,
vertex.color=grade_color,
vertex.label=NA,
edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"),
pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)
# analysis
centrality <- data.frame(id = attribute$id, degree = deg$deg, type = attribute$type,
close = clo, between = bet, eigen = eig$vector)
ggplot(centrality) + geom_point(aes(x = between, y = close, color = type)) +
labs(x = "Betweenness centrality", y = "Close Centrality")
ggplot(centrality) + geom_point(aes(x = close, y =eigen, color = type)) +
labs(x = "Close Centrality", y = "Eigenvector Centrality")
ggplot(centrality) + geom_point(aes(x = between, y = eigen, color = type)) +
labs(x = "Betweenness centrality", y = "Eigenvector Centrality")
cor(select(centrality, degree, close, between, eigen))
## degree close between eigen
## degree 1.0000000 0.9421107 0.8875039 0.9711950
## close 0.9421107 1.0000000 0.8615883 0.8745694
## between 0.8875039 0.8615883 1.0000000 0.8091362
## eigen 0.9711950 0.8745694 0.8091362 1.0000000
centrality_strong <- data.frame(degree=deg_strong$deg, close = clo_strong,
between = bet_strong, eigen = eig_strong$vector)
cor(centrality_strong)
## degree close between eigen
## degree 1.0000000 0.6513993 0.7624974 0.8123224
## close 0.6513993 1.0000000 0.2832208 0.4115593
## between 0.7624974 0.2832208 1.0000000 0.5225747
## eigen 0.8123224 0.4115593 0.5225747 1.0000000
# here we only consider the contact between students and teachers
dif.type <- delete.edges(contact, E(contact)[get.edge.attribute(contact, name = "same.type")==1])
summary(dif.type)
## IGRAPH c66f729 UNWB 242 432 --
## + attr: name (v/c), class (v/n), type (v/c), grade (v/c), gender
## | (v/c), size (v/n), weight (e/n), same.type (e/n)
V(dif.type)$type <- bipartite_mapping(dif.type)$type
V(dif.type)$color <- ifelse(V(dif.type)$type, "salmon", "yellow")
V(dif.type)$shape <- ifelse(V(dif.type)$type, "square", "circle")
V(dif.type)$label <- ifelse(V(dif.type)$type, get.vertex.attribute(dif.type, 'name'), NA)
plot(dif.type, vertex.label.cex = 0.6, vertex.label.color = 'black',
layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher","Student"), pch=c(22, 21),
col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)
grade1 <- delete.vertices(dif.type,
V(dif.type)[get.vertex.attribute(dif.type, name = 'grade')!=1 & get.vertex.attribute(dif.type, name = 'grade')!='N/A'])
plot(grade1, vertex.label.cex = 0.6, vertex.label.color = 'black',
layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher", "Student in Grade 1"), pch=c(22, 21),
col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)
# teacher 1745 and 1753 are responsible for students in grade 1
grade2 <- delete.vertices(dif.type,
V(dif.type)[get.vertex.attribute(dif.type, name = 'grade')!=2 & get.vertex.attribute(dif.type, name = 'grade')!='N/A'])
plot(grade2, vertex.label.cex = 0.6, vertex.label.color = 'black',
layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher", "Student in Grade 2"), pch=c(22, 21),
col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)
# teacher 1650 and 1852 are responsible for students in grade 2
grade3 <- delete.vertices(dif.type,
V(dif.type)[get.vertex.attribute(dif.type, name = 'grade')!=3 & get.vertex.attribute(dif.type, name = 'grade')!='N/A'])
plot(grade3, vertex.label.cex = 0.6, vertex.label.color = 'black',
layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher", "Student in Grade 3"), pch=c(22, 21),
col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)
# teacher 1709 and 1746 are responsible for students in grade 3
grade4 <- delete.vertices(dif.type,
V(dif.type)[get.vertex.attribute(dif.type, name = 'grade')!=4 & get.vertex.attribute(dif.type, name = 'grade')!='N/A'])
plot(grade4,vertex.label.cex = 0.6, vertex.label.color = 'black',
layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher", "Student in Grade 4"), pch=c(22, 21),
col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)
# teacher 1521 and 1653 are responsible for students in grade 4
grade5 <- delete.vertices(dif.type,
V(dif.type)[get.vertex.attribute(dif.type, name = 'grade')!=5 & get.vertex.attribute(dif.type, name = 'grade')!='N/A'])
plot(grade3, vertex.label.cex = 0.6, vertex.label.color = 'black',
layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher", "Student in Grade 5"), pch=c(22, 21),
col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)
# teacher 1709 and 1746 are responsible for students in grade 5
# degree distribution analysis
deg <- degree(dif.type)
deg <- data.frame(deg = deg, type = get.vertex.attribute(dif.type, 'type'),
name = get.vertex.attribute(dif.type, 'name'))
deg.tea <- subset(deg, type == 'TRUE', select = c(name, deg))
mean(deg.tea$deg)
## [1] 43.2
deg.stu <- subset(deg, type == 'FALSE', select = c(name, deg))
mean(deg.stu$deg)
## [1] 1.862069
arrange(deg.tea, -deg)
## name deg
## 1 1852 54
## 2 1709 53
## 3 1650 49
## 4 1745 49
## 5 1746 46
## 6 1668 44
## 7 1653 39
## 8 1753 34
## 9 1521 33
## 10 1824 31
head(arrange(deg.stu, deg))
## name deg
## 1 1775 0
## 2 1766 0
## 3 1799 0
## 4 1774 1
## 5 1836 1
## 6 1770 1
# create same class network
same.class <- matrix(0, 242, 242)
for (i in 1:242){
for (j in 1:242){
same.class[i,j] = (subset(attribute, id == i)$class ==
subset(attribute, id == j)$class)
}
}
same.class <- graph.adjacency(same.class, mode = "undirected")
# same class block model
contact.sna <- asNetwork(contact)
contact.strong.sna <- asNetwork(contact_strong)
same.class.sna <- asNetwork(same.class)
# using same class network to build block model
eq <- equiv.clust(same.class.sna, mode="digraph")
# block model of contact network
b <- blockmodel(contact.sna, eq, k=11)
classgroup <- data.frame(name = b$plabels, block = b$block.membership)
classgroup$name <- as.integer(as.character(classgroup$name))
classgroup$block <- as.integer(as.character(classgroup$block))
classgroup <- sqldf("SELECT MIN(name) AS id, block FROM classgroup GROUP BY block")
classgroup$class <- left_join(classgroup, attribute, by = 'id')$class
bimage <- b$block.model
colnames(bimage) <- classgroup$class
rownames(bimage) <- classgroup$class
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
ggplot(m.bimage, aes(Var1, Var2)) +
geom_tile(aes(fill = value),colour = "white") +
scale_fill_gradient(low = "white",high = "red") +
labs(x="", y="")
den <- graph.density(contact)
bimage[bimage < den] <- 0
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
ggplot(m.bimage, aes(Var1, Var2)) +
geom_tile(aes(fill = value),colour = "white") +
scale_fill_gradient(low = "white",high = "red") +
labs(x="", y="")
set.seed(1000)
gplot(bimage, diag = TRUE,
label = colnames(bimage),
mode = "fruchtermanreingold")
# block model of strong contact network
b <- blockmodel(contact.strong.sna, eq, k=11)
classgroup <- data.frame(name = b$plabels, block = b$block.membership)
classgroup$name <- as.integer(as.character(classgroup$name))
classgroup$block <- as.integer(as.character(classgroup$block))
classgroup <- sqldf("SELECT MIN(name) AS id, block FROM classgroup GROUP BY block")
classgroup$class <- left_join(classgroup, attribute, by = 'id')$class
bimage <- b$block.model
colnames(bimage) <- classgroup$class
rownames(bimage) <- classgroup$class
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
ggplot(m.bimage, aes(Var1, Var2)) +
geom_tile(aes(fill = value),colour = "white") +
scale_fill_gradient(low = "white",high = "red") +
labs(x="", y="")
den <- graph.density(contact)
bimage[bimage < den] <- 0
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
ggplot(m.bimage, aes(Var1, Var2)) +
geom_tile(aes(fill = value),colour = "white") +
scale_fill_gradient(low = "white",high = "red") +
labs(x="", y="")
set.seed(1000)
gplot(bimage, diag = TRUE,
label = colnames(bimage),
mode = "fruchtermanreingold")
# same gender network
same.gender <- matrix(0, 242, 242)
for (i in 1:242){
for (j in 1:242){
same.gender[i,j] = (subset(attribute, id == i)$gender ==
subset(attribute, id == j)$gender)
}
}
same.gender <- graph.adjacency(same.gender, mode = "undirected")# same class block model
same.gender.sna <- asNetwork(same.gender)
# using same gender network to build block model
eq <- equiv.clust(same.gender.sna, mode="digraph")
# block model of contact network
b <- blockmodel(contact.sna, eq, k=4)
gendergroup <- data.frame(name = b$plabels, block = b$block.membership)
gendergroup$name <- as.integer(as.character(gendergroup$name))
gendergroup$block <- as.integer(as.character(gendergroup$block))
gendergroup <- sqldf("SELECT MIN(name) AS id, block FROM gendergroup GROUP BY block")
gendergroup$id <- as.integer(gendergroup$id)
gendergroup$gender <- left_join(gendergroup, attribute, by = 'id')$gender
bimage <- b$block.model
colnames(bimage) <- gendergroup$gender
rownames(bimage) <- gendergroup$gender
bimage <- bimage[1:2, 1:2]
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
bimage
## M F
## M 0.3542334 0.2786491
## F 0.2786491 0.2689833
ggplot(m.bimage, aes(Var1, Var2)) +
geom_tile(aes(fill = value),colour = "white") +
scale_fill_gradient(low = "white",high = "red") +
labs(x="", y="")
# block model of strong contact network
b <- blockmodel(contact.strong.sna, eq, k=4)
gendergroup <- data.frame(name = b$plabels, block = b$block.membership)
gendergroup$name <- as.integer(as.character(gendergroup$name))
gendergroup$block <- as.integer(as.character(gendergroup$block))
gendergroup <- sqldf("SELECT MIN(name) AS id, block FROM gendergroup GROUP BY block")
gendergroup$id <- as.integer(gendergroup$id)
gendergroup$gender <- left_join(gendergroup, attribute, by = 'id')$gender
bimage <- b$block.model
colnames(bimage) <- gendergroup$gender
rownames(bimage) <- gendergroup$gender
bimage <- bimage[1:2, 1:2]
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
bimage
## M F
## M 0.1261632 0.1027174
## F 0.1027174 0.1047297
ggplot(m.bimage, aes(Var1, Var2)) +
geom_tile(aes(fill = value),colour = "white") +
scale_fill_gradient(low = "white",high = "red") +
labs(x="", y="")
# number of common friends matrix
common.friends <- matrix(0, 242, 242)
for (i in 1:242){
for (j in 1:242){
common.friends[i,j] = cocitation(contact_strong, i)[j]
}
}
contact.strong.matrix <- as.matrix(get.adjacency(contact_strong))
log <- netlogit(contact.strong.matrix, common.friends,reps=100)
summary(log)
##
## Network Logit Model
##
## Coefficients:
## Estimate Exp(b) Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept) -4.8111309 0.00813865 1 0 1
## x1 0.3900841 1.47710499 1 0 0
##
## Goodness of Fit Statistics:
##
## Null deviance: 80851.46 on 58322 degrees of freedom
## Residual deviance: 14291.61 on 58320 degrees of freedom
## Chi-Squared test of fit improvement:
## 66559.85 on 2 degrees of freedom, p-value 0
## AIC: 14295.61 BIC: 14313.55
## Pseudo-R^2 Measures:
## (Dn-Dr)/(Dn-Dr+dfn): 0.5329826
## (Dn-Dr)/Dn: 0.8232363
## Contingency Table (predicted (rows) x actual (cols)):
##
## Actual
## Predicted 0 1
## 0 51152 1626
## 1 926 4618
##
## Total Fraction Correct: 0.9562429
## Fraction Predicted 1s Correct: 0.8329726
## Fraction Predicted 0s Correct: 0.9691917
## False Negative Rate: 0.26041
## False Positive Rate: 0.01778102
##
## Test Diagnostics:
##
## Null Hypothesis: qap
## Replications: 100
## Distribution Summary:
##
## (intercept) x1
## Min -143.7025 -3.6334
## 1stQ -143.5779 -0.6945
## Median -143.4592 0.4006
## Mean -143.4637 0.4265
## 3rdQ -143.3614 1.3510
## Max -143.0888 5.1278
common <- as.vector(common.friends)
relation <- as.vector(contact.strong.matrix)
b0 <- log$coefficients[1]
b1 <- log$coefficients[2]
probs <- rep(0,length(common))
for (i in 1:length(common)) {
probs[i] <- (exp(b0+b1*common[i]))/(1+exp(b0+b1*common[i]))
}
ggplot(data.frame(common, relation, probs)) +
geom_point(aes(x = common, y = probs), size = 1, color = "salmon") +
geom_point(aes(x = common, y = relation), size = 1, color = "blue") +
labs(x = "Number of Common Friends", y = "Probability")